In [1]:
# import libraries 
import altair as alt
import pandas as pd
In [2]:
# Load the dataset into a Pandas DataFrame
data = pd.read_excel('SFTT.xlsx')
In [3]:
# Remove white spaces from column names
data = data.rename(columns=lambda x: x.strip())

data = data[data['Tree Present'] == True]
data = data.astype({'Postal Code': 'string'})
data['Postal Code'] = data['Postal Code'].apply(lambda x: '{0:0>7}'.format(x))
data['Postal Code'] = data['Postal Code'].str[:-2]
In [4]:
# Drop rows with missing values in the 'Species' column
if 'Species' in data.columns:
    data = data.dropna(subset=['Species'])
In [5]:
# The color messed with the data type
data = data.rename(columns = {'Tree: Tree Condition': "Tree Condition"})
In [6]:
# Clean our data, take out postal codes that are too far from the Mass Ave Corridor
data = data[~data['Postal Code'].isin(['02136', '00021', '000<N', '02129', '02109', '02113', '02122', '02128', '02201', '02215', '02445'])]

postal_codes = data['Postal Code'].unique()
dropdown = alt.binding_select(options = postal_codes)
selection = alt.selection_single(
    fields=["Postal Code"],
    bind=dropdown,
    name="Postal Codes",
)
C:\Users\ethan\anaconda3\lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection_single' is deprecated.  Use 'selection_point'
  warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
In [11]:
brush = alt.selection_interval()

trees = alt.Chart(data).mark_circle(size=10).encode(
    longitude="Point X:Q",
    latitude="Point Y:Q",
    color="Postal Code",
    size=alt.Size('Diameter:Q', scale=alt.Scale(range=[10, 150])),
    tooltip=["Genus","Common Name", "Postal Code", "Species"]
).add_params(brush).add_selection(selection
).transform_filter(selection
).properties(title='Trees Across Boston', width=300,
    height=300)

# Create an interactive histogram layer to show the count of trees by zip code
histogram = alt.Chart(data).mark_bar().encode(
    alt.X('count():Q', title='Tree Count'),
    alt.Y('Postal Code:N', title='Zip Code'),
    color = 'Postal Code:N'
    #color=alt.condition(
        #alt.datum.zip_code == trees.encoding['color'].field,
       # alt.value('steelblue'),
       # alt.value('lightgray')
    ).transform_filter(brush).properties(title='Number of Trees by Postal Code', height=300)

common_trees = alt.Chart(data).mark_bar().encode(
    alt.X('Genus:N', title='Genus'),
    alt.Y('count():Q', title='Count'),
    color='Postal Code'
    ).transform_filter(brush).properties(title='Number of Trees by Genus', width=425)


trees & histogram | common_trees
C:\Users\ethan\anaconda3\lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead.
  warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
Out[11]:
In [8]:
# CODE FOR TREEMAP, WASN'T WORKING YET
# base = (
#     alt.Chart(source)
#     .transform_aggregate(count_="count()", groupby=["Origin", "Cylinders"])
#     .transform_stack(
#         stack="count_",
#         as_=["stack_count_Origin1", "stack_count_Origin2"],
#         offset="normalize",
#         sort=[alt.SortField("Origin", "ascending")],
#         groupby=[],
#     )
#     .transform_window(
#         x="min(stack_count_Origin1)",
#         x2="max(stack_count_Origin2)",
#         rank_Cylinders="dense_rank()",
#         distinct_Cylinders="distinct(Cylinders)",
#         groupby=["Origin"],
#         frame=[None, None],
#         sort=[alt.SortField("Cylinders", "ascending")],
#     )
#     .transform_window(
#         rank_Origin="dense_rank()",
#         frame=[None, None],
#         sort=[alt.SortField("Origin", "ascending")],
#     )
#     .transform_stack(
#         stack="count_",
#         groupby=["Origin"],
#         as_=["y", "y2"],
#         offset="normalize",
#         sort=[alt.SortField("Cylinders", "ascending")],
#     )
#     .transform_calculate(
#         ny="datum.y + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3",
#         ny2="datum.y2 + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3",
#         nx="datum.x + (datum.rank_Origin - 1) * 0.01",
#         nx2="datum.x2 + (datum.rank_Origin - 1) * 0.01",
#         xc="(datum.nx+datum.nx2)/2",
#         yc="(datum.ny+datum.ny2)/2",
#     )
# )


# rect = base.mark_rect().encode(
#     x=alt.X("nx:Q").axis(None),
#     x2="nx2",
#     y="ny:Q",
#     y2="ny2",
#     color=alt.Color("Origin:N").legend(None),
#     opacity=alt.Opacity("Cylinders:Q").legend(None),
#     tooltip=["Origin:N", "Cylinders:Q"],
# )


# text = base.mark_text(baseline="middle").encode(
#     alt.X("xc:Q").axis(None),
#     alt.Y("yc:Q").title("Cylinders"),
#     text="Cylinders:N"
# )

# mosaic = rect + text

# origin_labels = base.mark_text(baseline="middle", align="center").encode(
#     alt.X("min(xc):Q").title("Origin").axis(orient="top"),
#     alt.Color("Origin").legend(None),
#     text="Origin",
# )

# (
#     (origin_labels & mosaic)
#     .resolve_scale(x="shared")
#     .configure_view(stroke="")
#     .configure_concat(spacing=10)
#     .configure_axis(domain=False, ticks=False, labels=False, grid=False)
# )
In [10]:
chart = alt.vconcat(trees & histogram | common_trees)
chart.save('viz.html')
In [ ]: